import pandas as pd
import numpy as np
import plotly.express as px
# Load the dataset
data = pd.read_csv("world_population.csv")
# Diasplay the loaded dataset
data
| Rank | CCA3 | Country/Territory | Capital | Continent | 2022 Population | 2020 Population | 2015 Population | 2010 Population | 2000 Population | 1990 Population | 1980 Population | 1970 Population | Area (km²) | Density (per km²) | Growth Rate | World Population Percentage | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 36 | AFG | Afghanistan | Kabul | Asia | 41128771 | 38972230 | 33753499 | 28189672 | 19542982 | 10694796 | 12486631 | 10752971 | 652230 | 63.0587 | 1.0257 | 0.52 |
| 1 | 138 | ALB | Albania | Tirana | Europe | 2842321 | 2866849 | 2882481 | 2913399 | 3182021 | 3295066 | 2941651 | 2324731 | 28748 | 98.8702 | 0.9957 | 0.04 |
| 2 | 34 | DZA | Algeria | Algiers | Africa | 44903225 | 43451666 | 39543154 | 35856344 | 30774621 | 25518074 | 18739378 | 13795915 | 2381741 | 18.8531 | 1.0164 | 0.56 |
| 3 | 213 | ASM | American Samoa | Pago Pago | Oceania | 44273 | 46189 | 51368 | 54849 | 58230 | 47818 | 32886 | 27075 | 199 | 222.4774 | 0.9831 | 0.00 |
| 4 | 203 | AND | Andorra | Andorra la Vella | Europe | 79824 | 77700 | 71746 | 71519 | 66097 | 53569 | 35611 | 19860 | 468 | 170.5641 | 1.0100 | 0.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 229 | 226 | WLF | Wallis and Futuna | Mata-Utu | Oceania | 11572 | 11655 | 12182 | 13142 | 14723 | 13454 | 11315 | 9377 | 142 | 81.4930 | 0.9953 | 0.00 |
| 230 | 172 | ESH | Western Sahara | El Aaiún | Africa | 575986 | 556048 | 491824 | 413296 | 270375 | 178529 | 116775 | 76371 | 266000 | 2.1654 | 1.0184 | 0.01 |
| 231 | 46 | YEM | Yemen | Sanaa | Asia | 33696614 | 32284046 | 28516545 | 24743946 | 18628700 | 13375121 | 9204938 | 6843607 | 527968 | 63.8232 | 1.0217 | 0.42 |
| 232 | 63 | ZMB | Zambia | Lusaka | Africa | 20017675 | 18927715 | 16248230 | 13792086 | 9891136 | 7686401 | 5720438 | 4281671 | 752612 | 26.5976 | 1.0280 | 0.25 |
| 233 | 74 | ZWE | Zimbabwe | Harare | Africa | 16320537 | 15669666 | 14154937 | 12839771 | 11834676 | 10113893 | 7049926 | 5202918 | 390757 | 41.7665 | 1.0204 | 0.20 |
234 rows × 17 columns
# Data Types and Missing Values
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 234 entries, 0 to 233 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Rank 234 non-null int64 1 CCA3 234 non-null object 2 Country/Territory 234 non-null object 3 Capital 234 non-null object 4 Continent 234 non-null object 5 2022 Population 234 non-null int64 6 2020 Population 234 non-null int64 7 2015 Population 234 non-null int64 8 2010 Population 234 non-null int64 9 2000 Population 234 non-null int64 10 1990 Population 234 non-null int64 11 1980 Population 234 non-null int64 12 1970 Population 234 non-null int64 13 Area (km²) 234 non-null int64 14 Density (per km²) 234 non-null float64 15 Growth Rate 234 non-null float64 16 World Population Percentage 234 non-null float64 dtypes: float64(3), int64(10), object(4) memory usage: 31.2+ KB
# Show the Top Values
data.head()
| Rank | CCA3 | Country/Territory | Capital | Continent | 2022 Population | 2020 Population | 2015 Population | 2010 Population | 2000 Population | 1990 Population | 1980 Population | 1970 Population | Area (km²) | Density (per km²) | Growth Rate | World Population Percentage | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 36 | AFG | Afghanistan | Kabul | Asia | 41128771 | 38972230 | 33753499 | 28189672 | 19542982 | 10694796 | 12486631 | 10752971 | 652230 | 63.0587 | 1.0257 | 0.52 |
| 1 | 138 | ALB | Albania | Tirana | Europe | 2842321 | 2866849 | 2882481 | 2913399 | 3182021 | 3295066 | 2941651 | 2324731 | 28748 | 98.8702 | 0.9957 | 0.04 |
| 2 | 34 | DZA | Algeria | Algiers | Africa | 44903225 | 43451666 | 39543154 | 35856344 | 30774621 | 25518074 | 18739378 | 13795915 | 2381741 | 18.8531 | 1.0164 | 0.56 |
| 3 | 213 | ASM | American Samoa | Pago Pago | Oceania | 44273 | 46189 | 51368 | 54849 | 58230 | 47818 | 32886 | 27075 | 199 | 222.4774 | 0.9831 | 0.00 |
| 4 | 203 | AND | Andorra | Andorra la Vella | Europe | 79824 | 77700 | 71746 | 71519 | 66097 | 53569 | 35611 | 19860 | 468 | 170.5641 | 1.0100 | 0.00 |
# Show the Bottom Values
data.tail()
| Rank | CCA3 | Country/Territory | Capital | Continent | 2022 Population | 2020 Population | 2015 Population | 2010 Population | 2000 Population | 1990 Population | 1980 Population | 1970 Population | Area (km²) | Density (per km²) | Growth Rate | World Population Percentage | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 229 | 226 | WLF | Wallis and Futuna | Mata-Utu | Oceania | 11572 | 11655 | 12182 | 13142 | 14723 | 13454 | 11315 | 9377 | 142 | 81.4930 | 0.9953 | 0.00 |
| 230 | 172 | ESH | Western Sahara | El Aaiún | Africa | 575986 | 556048 | 491824 | 413296 | 270375 | 178529 | 116775 | 76371 | 266000 | 2.1654 | 1.0184 | 0.01 |
| 231 | 46 | YEM | Yemen | Sanaa | Asia | 33696614 | 32284046 | 28516545 | 24743946 | 18628700 | 13375121 | 9204938 | 6843607 | 527968 | 63.8232 | 1.0217 | 0.42 |
| 232 | 63 | ZMB | Zambia | Lusaka | Africa | 20017675 | 18927715 | 16248230 | 13792086 | 9891136 | 7686401 | 5720438 | 4281671 | 752612 | 26.5976 | 1.0280 | 0.25 |
| 233 | 74 | ZWE | Zimbabwe | Harare | Africa | 16320537 | 15669666 | 14154937 | 12839771 | 11834676 | 10113893 | 7049926 | 5202918 | 390757 | 41.7665 | 1.0204 | 0.20 |
# Summary Statistics
data.describe()
| Rank | 2022 Population | 2020 Population | 2015 Population | 2010 Population | 2000 Population | 1990 Population | 1980 Population | 1970 Population | Area (km²) | Density (per km²) | Growth Rate | World Population Percentage | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 234.000000 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 2.340000e+02 | 234.000000 | 234.000000 | 234.000000 |
| mean | 117.500000 | 3.407441e+07 | 3.350107e+07 | 3.172996e+07 | 2.984524e+07 | 2.626947e+07 | 2.271022e+07 | 1.898462e+07 | 1.578691e+07 | 5.814494e+05 | 452.127044 | 1.009577 | 0.427051 |
| std | 67.694165 | 1.367664e+08 | 1.355899e+08 | 1.304050e+08 | 1.242185e+08 | 1.116982e+08 | 9.783217e+07 | 8.178519e+07 | 6.779509e+07 | 1.761841e+06 | 2066.121904 | 0.013385 | 1.714977 |
| min | 1.000000 | 5.100000e+02 | 5.200000e+02 | 5.640000e+02 | 5.960000e+02 | 6.510000e+02 | 7.000000e+02 | 7.330000e+02 | 7.520000e+02 | 1.000000e+00 | 0.026100 | 0.912000 | 0.000000 |
| 25% | 59.250000 | 4.197385e+05 | 4.152845e+05 | 4.046760e+05 | 3.931490e+05 | 3.272420e+05 | 2.641158e+05 | 2.296142e+05 | 1.559970e+05 | 2.650000e+03 | 38.417875 | 1.001775 | 0.010000 |
| 50% | 117.500000 | 5.559944e+06 | 5.493074e+06 | 5.307400e+06 | 4.942770e+06 | 4.292907e+06 | 3.825410e+06 | 3.141146e+06 | 2.604830e+06 | 8.119950e+04 | 95.346750 | 1.007900 | 0.070000 |
| 75% | 175.750000 | 2.247650e+07 | 2.144798e+07 | 1.973085e+07 | 1.915957e+07 | 1.576230e+07 | 1.186923e+07 | 9.826054e+06 | 8.817329e+06 | 4.304258e+05 | 238.933250 | 1.016950 | 0.280000 |
| max | 234.000000 | 1.425887e+09 | 1.424930e+09 | 1.393715e+09 | 1.348191e+09 | 1.264099e+09 | 1.153704e+09 | 9.823725e+08 | 8.225344e+08 | 1.709824e+07 | 23172.266700 | 1.069100 | 17.880000 |
# Show the Shape of data
data.shape
(234, 17)
# Count the Null Data
data.isnull().sum()
Rank 0 CCA3 0 Country/Territory 0 Capital 0 Continent 0 2022 Population 0 2020 Population 0 2015 Population 0 2010 Population 0 2000 Population 0 1990 Population 0 1980 Population 0 1970 Population 0 Area (km²) 0 Density (per km²) 0 Growth Rate 0 World Population Percentage 0 dtype: int64
# Continent distribution
continent_counts = data['Continent'].value_counts().reset_index()
continent_counts.columns = ['Continent', 'Count']
# Create a bar chart using Plotly
fig = px.bar(continent_counts, x='Continent', y='Count',
labels={'Continent': 'Continent', 'Count': 'Count'},
title='Distribution of Countries by Continent')
# Rotate x-axis labels for better readability
fig.update_layout(xaxis=dict(tickangle=45))
# Show the plot
fig.show()
# Population distribution for 2022
population_2022 = data['2022 Population']
# Create a histogram using Plotly
fig = px.histogram(population_2022, nbins=20,
labels={'value': '2022 Population', 'count': 'Count'},
title='Population Distribution in 2022') # You can choose a different template if desired
# Show the plot
fig.show()
The dataset provided doesn't include information about ages or genders, which are typically used to represent the distribution of ages or genders in a population. If you have another dataset that includes age or gender data, please provide it, and I'd be happy to help you create visualizations based on that data
# Generate synthetic age data for the same number of countries as in the population data
np.random.seed(0)
age_data = np.random.randint(1, 100, len(data))
# Generate synthetic gender data for the same number of countries as in the population data
gender_data = np.random.choice(['Male', 'Female'], size=len(data), p=[0.5, 0.5])
# Add the synthetic age and gender data to the population DataFrame
data['Synthetic Age'] = age_data
data['Synthetic Gender'] = gender_data
# Visualize age distribution using Plotly
fig_age = px.histogram(data, x='Synthetic Age', nbins=20,
labels={'Synthetic Age': 'Age', 'count': 'Count'},
title='Synthetic Age Distribution')
# Show the age distribution plot
fig_age.show()
# Visualize gender distribution using Plotly
fig_gender = px.bar(data['Synthetic Gender'].value_counts().reset_index(),
x='index', y='Synthetic Gender',
labels={'index': 'Gender', 'Synthetic Gender': 'Count'},
title='Synthetic Gender Distribution')
# Show the gender distribution plot
fig_gender.show()